rm(list=ls())

d.all.men <- read.csv("data-distribution-all-men.csv")
d.all.women <- read.csv("data-distribution-all-women.csv")
d.white.men <- read.csv("data-distribution-white-men.csv")
d.white.women <- read.csv("data-distribution-white-women.csv")

# FigureC1 ------------------------------------------

w1  <- ddply(d.all.women, "party", summarise, grp.mean=mean(meansim))
gg1 <- ggplot(d.all.women, aes(x=meansim, color=party, fill=party)) +
  geom_density(alpha=0.7, lwd=1.1) + theme_minimal() + ylab("Density") + xlab ("Resemblance")  + 
  geom_vline(data=w1, aes(xintercept=grp.mean, color=party), linetype="dashed", lwd=1.2) + 
  scale_color_manual(values= c("skyblue", "tomato")) + scale_fill_manual(values = c("skyblue", "tomato")) + 
  theme(axis.text = element_text(size=12), axis.title = element_text(size=14), legend.position = "top")  + ggtitle("All Women") 


w2 <- ddply(d.white.women, "party", summarise, grp.mean=mean(meansim))

gg2 <- ggplot(d.white.women, aes(x=meansim, color=party, fill=party)) +
  geom_density(alpha=0.7, lwd=1.1) + theme_minimal() + ylab("Density") + xlab ("Resemblance")  + 
  geom_vline(data=w2, aes(xintercept=grp.mean, color=party), linetype="dashed", lwd=1.2) +
  scale_color_manual(values= c("skyblue", "tomato")) + scale_fill_manual(values = c("skyblue", "tomato")) +
  theme(axis.text = element_text(size=12), axis.title = element_text(size=14), legend.position = "top") + ggtitle("White Women")


ggarrange(gg1, gg2, common.legend = TRUE)
ggsave("FigureC1.pdf", width = 15, height = 6, units = "in")

# Figure2 ----------------------------------------------------

m1 <- ddply(d.all.men, "party", summarise, grp.mean=mean(meansim))

gg3 <- ggplot(d.all.men, aes(x=meansim, color=party, fill=party)) +
  geom_density(alpha=0.7, lwd=1.1) + theme_minimal() + ylab("Density") + xlab ("Resemblance")  + 
  geom_vline(data=m1, aes(xintercept=grp.mean, color=party), linetype="dashed", lwd=1.2) +
  scale_color_manual(values= c("skyblue", "tomato")) + scale_fill_manual(values = c("skyblue", "tomato")) +
  theme(axis.text = element_text(size=12), axis.title = element_text(size=14), legend.position = "top")  + ggtitle("All Men") 


m2 <- ddply(d.white.men, "party", summarise, grp.mean=mean(meansim))

gg4 <- ggplot(d.white.men, aes(x=meansim, color=party, fill=party)) +
  geom_density(alpha=0.7, lwd=1.1) + theme_minimal() + ylab("Density") + xlab ("Resemblance")  + 
  geom_vline(data=m2, aes(xintercept=grp.mean, color=party), linetype="dashed", lwd=1.2) +
  scale_color_manual(values= c("skyblue", "tomato")) +  scale_fill_manual(values = c("skyblue", "tomato")) +
  theme(axis.text = element_text(size=12), axis.title = element_text(size=14), legend.position = "top") + ggtitle("White Men")

ggarrange(gg3, gg4, common.legend = TRUE)
ggsave("Figure2.pdf", width = 15, height = 6, units = "in")



# Table1 -----------------------------------------------------------------


d.reg <- d.white.men[order(d.white.men$id, d.white.men$congress),]
d.reg <- d.reg %>% group_by(id) %>% mutate(t = row_number(id))
d.reg <- d.reg %>% group_by(id) %>% mutate(T = max(t))


d.reg$nexthouse <- 0
d.reg$nexthouse <- ifelse(d.reg$t<d.reg$T,1,d.reg$nexthouse)
d.reg$nexthouse <- ifelse(d.reg$congress==116,NA,d.reg$nexthouse)
d.reg <- pdata.frame(d.reg, index = c("id"))

plm.both <- plm(nexthouse ~ meansim + factor(congress), data = d.reg, model = "within", effect = "individual", cluster = "id")
plm.demo <- plm(nexthouse ~ meansim + factor(congress), data = d.reg[d.reg$party=="Democrat",], model = "within", effect = "individual", cluster = "id")
plm.repu <- plm(nexthouse ~ meansim + factor(congress), data = d.reg[d.reg$party=="Republican",], model = "within", effect = "individual", cluster = "id")

robust.se <- list(sqrt(diag(vcovHC(plm.both,cluster="group", type = "HC1"))),
                  sqrt(diag(vcovHC(plm.demo,cluster="group",  type = "HC1"))),
                  sqrt(diag(vcovHC(plm.repu,cluster="group",  type = "HC1"))))
stargazer(plm.both, plm.demo, plm.repu, se = robust.se, keep="meansim", type =  "text")
stargazer(plm.both, plm.demo, plm.repu, se = robust.se, keep="meansim", type =  "latex", out="Table1.tex")

table(!is.na(unique(d.reg$id[!is.na(d.reg$nexthouse) & !is.na(d.reg$meansim) & !is.na(d.reg$congress)])))
table(!is.na(unique(d.reg[d.reg$party=="Democrat",]$id[!is.na(d.reg[d.reg$party=="Democrat",]$nexthouse) & !is.na(d.reg[d.reg$party=="Democrat",]$meansim) & !is.na(d.reg[d.reg$party=="Democrat",]$congress)])))
table(!is.na(unique(d.reg[d.reg$party=="Republican",]$id[!is.na(d.reg[d.reg$party=="Republican",]$nexthouse) & !is.na(d.reg[d.reg$party=="Republican",]$meansim) & !is.na(d.reg[d.reg$party=="Republican",]$congress)])))


